{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🧠 A quick useful util method - Measures and prints the token generation speed (in tokens per second) for evaluating your LLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "def measure_token_speed(model, tokenizer, prompts):\n",
    "    \"\"\"\n",
    "    Parameters:\n",
    "    model (transformers.PreTrainedModel): The language model to be used for token generation.\n",
    "    tokenizer (transformers.PreTrainedTokenizer): The tokenizer corresponding to the model,\n",
    "                                                  used for tokenizing the prompts.\n",
    "    prompts (list of str): A list of string prompts for which the token generation speed is measured.\n",
    "\n",
    "    This function iterates over each prompt, generates tokens using the model, and measures the\n",
    "    time taken for each prompt. It then calculates and prints the tokens per second for each prompt\n",
    "    and the average speed across all prompts.\n",
    "\n",
    "    Note: The function assumes that the model and tokenizer are compatible and that the model is\n",
    "    already loaded onto the appropriate device (e.g., CUDA for GPU acceleration).\n",
    "    \"\"\"\n",
    "    duration = 0.0\n",
    "    total_length = 0\n",
    "\n",
    "    for p in prompts:\n",
    "        try:\n",
    "            inputs = tokenizer([p], return_tensors=\"pt\").to('cuda')\n",
    "            start_time = time.time()\n",
    "            output = model.generate(**inputs, max_new_tokens=1000)\n",
    "            prompt_duration = time.time() - start_time\n",
    "            tok_sec_prompt = round(len(output[0]) / prompt_duration, 3)\n",
    "            print(f\"Prompt: {p} --- {tok_sec_prompt} tokens/seconds ---\")\n",
    "            duration += prompt_duration\n",
    "            total_length += len(output[0])\n",
    "\n",
    "            # Optional: Clear up GPU memory here if needed\n",
    "            # torch.cuda.empty_cache()\n",
    "        except Exception as e:\n",
    "            print(f\"Error processing prompt '{p}': {e}\")\n",
    "\n",
    "    tok_sec = round(total_length / duration, 3)\n",
    "    print(f\"Average --- {tok_sec} tokens/seconds ---\")\n",
    "\n",
    "# Usage Example\n",
    "# model = [Your Model]\n",
    "# tokenizer = [Your Tokenizer]\n",
    "# prompts = [\"AI will rule \", \"I am in Love with \",\n",
    "# \"Awesome weather and \", \"My favourite movie is \"]\n",
    "# measure_token_speed(model, tokenizer, prompts)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
